#--coding:utf-8--
import requests
from lxml import etree
import time
import json

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
}

domain = "https://xiaohua.zol.com.cn"

joke_list = []

def parse_page(page_url):
    resp = requests.get(page_url,headers=headers)
    text = resp.text
    # print(text)
    parser = etree.HTML(text)
    detail_url_list = parser.xpath("//ul[@class='article-list']/li[@class='article-summary']//a[@class='all-read']/@href")
    for detail_url in detail_url_list:
        detail_url = domain + detail_url
        parse_detail(detail_url)
        time.sleep(3)



def parse_detail(detail_url):
    resp = requests.get(detail_url,headers=headers)
    text = resp.text
    parser = etree.HTML(text)
    joke_title = parser.xpath("//h1[@class='article-title']/text()")[0]
    joke_content = "".join(parser.xpath("//div[@class='article-text']//text()")).strip()
    joke_list.append({
        "title":joke_title,
        "content":joke_content
    })
    print(f"{joke_title}笑话下载完毕!")


def main():
    for page in range(11):
        page_url = f"https://xiaohua.zol.com.cn/lengxiaohua/{page}.html"
        parse_page(page_url)
        time.sleep(3)

    with open("joke.json","w",encoding="utf-8") as fp:
        # ensure_ascii:如果没有设置为False，那么保存到json文件中的中文将会存储成Unicode字符串
        json.dump(joke_list,fp,ensure_ascii=False)

    print("="*30)
    print("笑话抓取完毕")


if __name__ == '__main__':
    main()